################################################################################ 
################################################################################ 
#################### Meta-analysis on the Contact Hypothesis ###################
################################################################################ 
################################################################################ 

# This script performs the meta-analysis
# for the project 
# Meta-analysis on the Contact Hypothesis
# by Gwen-Jiro Clochard


################################################################################ 
################################## Import data #################################
################################################################################ 


all_measures <- 
  read_excel(paste(pathdata, "Clean//All_measures.xlsx", sep="//"))

indiv <- 
  read_excel(paste(pathdata, "Clean//Individual_papers.xlsx", sep="//"))


################################################################################ 
################################## Meta setup ##################################
################################################################################ 

pt_bright <- pt_scheme_qualitative("bright")
pt_hc     <- pt_scheme_qualitative("high contrast")
pt_mc     <- pt_scheme_qualitative("medium contrast")
jp_col    <- jp_scheme()
uo_blue   <- c("#1B1172")


################################################################################ 
############################ Meta-analytic estimate ############################
################################################################################ 


##### Meta-Analytic Estimate #####


### Format data

dat <- all_measures %>%
  mutate(
    e  = effect,
    se = se_effect,             
    v  = se_effect^2,
    paper_id = as.integer(factor(Paper_id)), 
    obs_id  = row_number()
  ) %>%
  filter(!is.na(e) & !is.na(se) & !is.na(paper_id))

dat <- as.data.frame(dat)


### Fit models

fit <- list()
fit[[1]] <- re_meta(dat, cluster = "paper_id", multi.level = FALSE)
fit[[2]] <- re_meta(dat, cluster = "paper_id", multi.level = TRUE)


### Extract and format results

summary_re_out <- rbind(
  summary_re_meta(fit[[1]]),
  summary_re_meta(fit[[2]])
) %>%
  as.data.frame() %>%
  t() %>%
  as.data.frame()

summary_re_out <- summary_re_out %>%
  rename(RE = V1, ML = V2) %>%
  rownames_to_column("Statistic")


### Exporting table ###

unique(summary_re_out$Statistic)

summary_re_out <- summary_re_out %>%
  mutate(
    Statistic = recode(
      Statistic,
      est      = "Coef.",
      se       = "SE",
      ci       = "95% CI",
      tau2     = "Tau2",
      i2       = "I2",
      tau2_w   = "Tau2 (within)",
      tau2_b   = "Tau2 (between)",
      i2_w     = "I2 (within)",
      i2_b     = "I2 (between)",
      cluster  = "Num. clusters",
      n        = "Num. obs"
    )
  )


print(
  xtable(summary_re_out),
  file = file.path(paste(pathtables, "meta_table.tex", sep = "//")),
  comment = FALSE, 
  include.rownames = FALSE, 
  sanitize.text.function = identity
)
  


################################################################################ 
################### Modified forest plot (from Popova et al) ###################
################################################################################ 


### Meta estimate

label <- "effect"

dat <- all_measures %>%
  mutate(
    e  = effect,
    se = se_effect,
    v  = se_effect^2, 
    paper_id = as.integer(factor(Paper_id))
  ) %>%
  filter(!is.na(e) & !is.na(se)) %>%
  group_by(paper_id) %>%
  mutate(n_obs = n()) %>%
  ungroup() %>%
  mutate(
    obs_id  = row_number()
  )

fit <- rma.mv(
  yi     = e,
  V      = v,
  random = list(~ 1 | paper_id/obs_id),
  data   = dat,
  method = "REML"
)

vcov <- vcovCR(fit, cluster = dat$paper_id, type = "CR2")
ci   <- conf_int(fit, vcov = vcov, test = "Satterthwaite")


### Forest plot data

dat_forest <- dat %>%
  group_by(paper_id) %>%
  mutate(n_obs = n()) %>%
  ungroup() %>%
  select(paper_id, paper_key, n_obs) %>%
  distinct() %>%
  left_join(
    dat %>%
      filter(n_obs == 1) %>%
      mutate(
        ci_l   = e - 1.96 * se,
        ci_u   = e + 1.96 * se,
        s_ci_l = e - 1.96 * se,
        s_ci_u = e + 1.96 * se
      ) %>%
      select(paper_id, beta = e, se, ci_l, ci_u, s_ci_l, s_ci_u),
    by = "paper_id"
  )

idx <- unique(dat_forest$paper_id)


### Aggregate studies with multiple observations

for (i in seq_len(nrow(dat_forest))) {
  if (dat_forest$n_obs[i] > 1) {
    temp_dat <- dat %>%
      filter(paper_id == idx[i])
    
    set.seed(12345)
    temp_fit <- rma(
      yi     = e,
      vi     = v,
      data   = temp_dat,
      method = "REML"
    )
    
    dat_forest$beta[i] <- as.numeric(temp_fit$beta)
    dat_forest$se[i]   <- as.numeric(temp_fit$se)
    dat_forest$ci_l[i] <- as.numeric(temp_fit$ci.lb)
    dat_forest$ci_u[i] <- as.numeric(temp_fit$ci.ub)

    b <- as.numeric(temp_fit$beta)
    dat_forest$s_ci_l[i] <- b - 1.96 * median(sqrt(temp_dat$v))
    dat_forest$s_ci_u[i] <- b + 1.96 * median(sqrt(temp_dat$v))
    
  }
}

dat_forest <- dat_forest %>%
  mutate(across(c(beta, se, ci_l, ci_u, s_ci_l, s_ci_u), as.numeric))


### Ordering

dat_forest <- dat_forest %>%
  arrange(-beta) %>%
  mutate(i = row_number())

y_range <- c(
  1.1 * min(c(dat_forest$ci_l, dat_forest$s_ci_l)),
  1.1 * max(c(dat_forest$ci_u, dat_forest$s_ci_u))
)


### Study-level forest
overall_beta <- as.numeric(fit$beta)

str(dat_forest[, c("beta", "ci_l", "ci_u", "s_ci_l", "s_ci_u")])
sapply(dat_forest[, c("beta", "ci_l", "ci_u", "s_ci_l", "s_ci_u")], class)

p_1 <- ggplot(dat_forest, aes(x = reorder(paper_key, i), y = beta)) +
  geom_hline(yintercept = 0, color = pt_bright$grey) +
  geom_hline(yintercept = overall_beta, linetype = "dashed") +
  geom_point(color = pt_hc$blue) +
  geom_errorbar(aes(ymin = ci_l, ymax = ci_u),
                color = pt_hc$blue, width = 0.4) +
  geom_errorbar(aes(ymin = s_ci_l, ymax = s_ci_u),
                color = pt_hc$blue, alpha = 0.33, width = 0.8) +
  geom_text(aes(x = i, y = y_range[2]),
            label = dat_forest$n_obs, size = 3.5, hjust = 1) +
  coord_flip(ylim = y_range, clip = "off") +
  ylab("Effect size") +
  theme_classic() +
  theme(axis.title.x = element_blank(),
        axis.text.x  = element_blank(),
        axis.ticks.x = element_blank(),
        axis.line.x  = element_blank(),
        axis.title.y = element_blank(),
        axis.text.y  = element_text(size = 10),
        legend.position = "none",
        plot.margin = margin(0, 20, 0, 20, unit = "pt"))
p_1


### Summary effect

summary_effect <- data.frame(
  citation = "Summary",
  beta     = fit$beta,
  ci_l     = ci$CI_L,
  ci_u     = ci$CI_U
)

p_2 <- ggplot(summary_effect, aes(x = citation, y = beta)) +
  geom_hline(yintercept = 0, color = pt_bright$grey) +
  geom_point(color = pt_hc$blue, shape = 15, size = 3) +
  geom_errorbar(aes(ymin = ci_l, ymax = ci_u),
                color = pt_hc$blue, width = 0.4) +
  coord_flip(ylim = y_range, clip = "off") +
  ylab("Effect size") +
  theme_classic() +
  theme(axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        axis.text.y  = element_text(size = 10),
        legend.position = "none",
        plot.margin = margin(0, 20, 0, 20, unit = "pt"))  
  
p_2


### Combine and save

fig_forest <- plot_grid(p_1, p_2,
                          ncol = 1,
                          rel_heights = c(1, 0.15),
                          align = "vh")
fig_forest

ggsave(
  file = file.path(paste(pathfigures, "effect_forest.pdf", sep = "//")),
  plot = fig_forest,
  width = 7,
  height = 9
)


################################################################################ 
############################# Standard forest plot #############################
################################################################################ 


### Get the meta-information from the multi-level model

meta_mean <- fit[[2]]$fit$b[1]
ci_lower  <- fit[[2]]$ci[1]
ci_upper  <- fit[[2]]$ci[2]


### Create the forest plot

# Create confidence intervals (95%)
all_measures$ci_lower <- all_measures$effect - 1.96 * all_measures$se_effect
all_measures$ci_upper <- all_measures$effect + 1.96 * all_measures$se_effect


# Order by effect size
all_measures <- all_measures %>%
  arrange(effect)

# Forest plot
fig_forest <- 
  ggplot(all_measures, aes(x = effect, y = reorder(name, -effect))) +
  geom_point() +
  geom_errorbarh(aes(xmin = ci_lower, xmax = ci_upper), height = 0.2) +
  geom_vline(xintercept = 0, linetype = "dashed") +
  
  # Meta-analytic mean (solid red)
  geom_vline(xintercept = meta_mean, color = "red", linetype = "solid") +
  
  # 95% CI (red dotted)
  geom_vline(xintercept = ci_lower, color = "red", linetype = "dotted") +
  geom_vline(xintercept = ci_upper, color = "red", linetype = "dotted") +
  
  labs(
    x = "Effect size",
    y = " ",
    title = " "
  ) +
  theme_classic()

fig_forest

ggsave(
  file = file.path(paste(pathfigures, "raw_forest.pdf", sep = "//")),
  plot = fig_forest,
  width = 7,
  height = 9
)


################################################################################ 
############################### Publication bias ###############################
################################################################################ 


##### OLS regressions #####

lm1 <- 
  lm(formula = effect ~ se_effect, 
     data = all_measures)
summary(lm1)

lm2 <- 
  lm(formula = effect ~ N, 
     data = all_measures)
summary(lm2)

lm3 <- 
  lm(formula = effect ~ Published, 
     data = all_measures)
summary(lm3)

lm4 <- 
  lm(formula = effect ~ registration, 
     data = all_measures)
summary(lm4)


# Alternative specification
# summary(lm(
#   data = all_measures, 
#   formula = se_effect ~ alt_sample
# ))
# iv_model <- ivreg(effect ~ se_effect | alt_sample, 
#                   data = all_measures)
# summary(iv_model, diagnostics = TRUE)


### LaTeX Table

collabels = c("Standard Error", "Sample Size", "Published", "Registration")

covlabels = c("SE", "N", "Published", "Registration")

notes = "The dependent variable is the standardized effect size. * p < 0.05, ** p < 0.01, *** p < 0.005. Standard errors in parentheses."

title = "Worrying signs of publication bias and/or file drawer problem"

label = "tab: publication bias"

stargazer(lm1, lm2, lm3, lm4,
          type='latex', 
          table.placement = "htbp",
          title = title,
          style = "all2",
          label = label,
          # align = TRUE,
          column.labels = collabels,
          covariate.labels = covlabels,
          # keep = keeps,
          keep.stat = c("rsq", "n"),
          summary=FALSE,
          dep.var.caption = NULL,
          mean.sd = TRUE,
          digits = 3,
          notes = notes,
          out = paste(pathtables, "Publication_bias.tex", sep="//"),
          notes.append = FALSE,
          notes.align = "l",
          header = FALSE, 
          star.cutoffs = c(0.05, 0.01, 0.005)
)


##### LMA #####


## Prepare data

dat <- all_measures %>%
  mutate(
    e  = effect,
    se = se_effect,
    v  = se_effect^2, 
    paper_id = as.integer(factor(Paper_id)), 
    obs_id  = row_number()
  ) %>%
  filter(!is.na(e) & !is.na(se))

dat <- as.data.frame(dat)


## Standard RE meta-analysis

temp_fit_re <- rma(yi = e, sei = se, data = dat, method = "REML")


## Multilevel RE

temp_fit_ml <- re_meta(dat, cluster = "paper_id", multi.level = TRUE)


## Limit meta-analysis (publication bias)

lma <- rma_limit(temp_fit_re, method.adjust = "beta0")


## Funnel plot

fig_funnel <- ggplot(dat, aes(x = e, y = 1 / se)) +
  geom_vline(xintercept = 0,
             color = "grey75",
             linewidth = 1,
             linetype = "solid") +
  geom_vline(xintercept = as.numeric(temp_fit_ml$fit$beta),
             color = pt_bright$red,
             linewidth = 1,
             linetype = "dashed") +
  # geom_vline(xintercept = lma$Effect.adjust,
  #            color = pt_bright$red,
  #            linewidth = 1,
  #            linetype = "dotdash") +
  geom_point(size = 2, color = uo_blue, alpha = 0.5) +
  scale_y_log10() +
  annotation_logticks(sides = "l") +
  xlab("Effect size") +
  ylab("Precision (1 / SE)") +
  theme_classic()

fig_funnel


ggsave(
  plot  = fig_funnel,
  file  = file.path(paste(pathfigures, "effect_funnel.pdf", sep = "//")),
  width = 4,
  height = 3
)


## Publication Bias evaluation

summary_lma_out <- summary_lma(lma) %>%
  rename(variable = statistic,
         effect   = value)

print(
  xtable(summary_lma_out),
  file = file.path(paste(pathtables, "table_lma.tex", sep = "//")),
  comment = FALSE,
  include.rownames = FALSE,
  sanitize.text.function = identity
)



##### MAIVE #####

all_measures <-
  all_measures %>%
  mutate(
    inv_variance = 1/se_effect^2, 
    variance = se_effect^2, 
    log_n = log(N), 
    inv_n = 1/N, 
    inv_log_N = 1/log(N)
  )

lm1 <-
  lm(
    data = all_measures, 
    formula = variance ~ inv_n, 
    weights = N
  )
summary(lm1)


##### RTMA #####

all_measures <- 
  all_measures %>%
  mutate(
    z_score = effect / se_effect
  )

plt <-
  ggplot(all_measures, aes(x = z_score)) +
  geom_density(alpha = 0.4, bw = .15) +
  geom_vline(xintercept = 0) +
  geom_vline(xintercept = 1.96, color = "red", linetype = "dashed") +
  labs(title = " ",
       x = "Z score",
       y = "Density") +
  theme_classic()
plt


ggsave(
  file = file.path(paste(pathfigures, "bunching.pdf", sep = "//")),
  plot = plt,
  width = 6,
  height = 4
)


################################################################################ 
################################ Clearing memory ###############################
################################################################################ 

rm(all_measures, indiv,
   ci, dat, dat_forest, 
   fit, jp_col, pt_bright, pt_hc, pt_mc, uo_blue,
   summary_effect, summary_re_out, summary_lma_out,
   temp_dat, temp_fit, temp_fit_ml, temp_fit_re, 
   b, i, idx, overall_beta, 
   p_1, p_2, fig_forest, y_range, vcov,
   lm1, lm2, lm3, lm4, 
   collabels, covlabels, label, notes, title,
   lma, fig_funnel
)

